"""Service providing MIS reports using pandas and Plotly-friendly data."""
from __future__ import annotations

from datetime import date, datetime
from typing import Dict, Tuple, List, Any

import pandas as pd

# Local imports (no package prefix)
from repositories.task_repo import TaskRepository
from repositories.assignment_repo import AssignmentRepository
from repositories.employee_repo import EmployeeRepository


def _get_all(repo: Any) -> List[Dict[str, Any]]:
    """
    Repository-agnostic fetch: tries list_all(), then get_all(), then all(),
    and finally falls back to repo.table.all().
    """
    for name in ("list_all", "get_all", "all"):
        if hasattr(repo, name):
            rows = getattr(repo, name)()
            return rows if isinstance(rows, list) else list(rows)
    if hasattr(repo, "table"):  # TinyDB table fallback
        return [dict(r) for r in repo.table.all()]
    return []


class ReportService:
    """Computes summary metrics and tabular data for dashboards and exports."""

    def __init__(
        self,
        task_repo: TaskRepository | None = None,
        assignment_repo: AssignmentRepository | None = None,
        employee_repo: EmployeeRepository | None = None,
    ) -> None:
        self.task_repo = task_repo or TaskRepository()
        self.assignment_repo = assignment_repo or AssignmentRepository()
        self.employee_repo = employee_repo or EmployeeRepository()

    # ---------- internal loaders ----------
    def _load_dataframes(self) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
        emp_df = pd.DataFrame(_get_all(self.employee_repo))
        assign_df = pd.DataFrame(_get_all(self.assignment_repo))
        task_df = pd.DataFrame(_get_all(self.task_repo))

        # Ensure consistent dtypes/columns exist
        if emp_df.empty:
            emp_df = pd.DataFrame(columns=["id", "emp_code", "full_name", "email", "is_active"])
        if assign_df.empty:
            assign_df = pd.DataFrame(
                columns=[
                    "id",
                    "task_id",
                    "employee_id",
                    "assigned_on",
                    "assigned_by",
                    "status",
                    "percent_complete",
                    "remarks",
                    "actual_completion_date",
                ]
            )
        if task_df.empty:
            task_df = pd.DataFrame(
                columns=[
                    "id",
                    "task_code",
                    "title",
                    "client_name",
                    "service_type",
                    "period",
                    "due_date",
                    "priority",
                    "recurrence",
                    "regulatory_reference",
                    "notes",
                ]
            )

        # Normalize dates that may be stored as ISO strings
        def _to_dt(s):
            try:
                return pd.to_datetime(s, errors="coerce").dt.tz_localize(None)
            except Exception:
                return pd.to_datetime(s, errors="coerce")

        if "due_date" in task_df.columns:
            task_df["due_date"] = _to_dt(task_df["due_date"])

        for col in ("assigned_on", "actual_completion_date"):
            if col in assign_df.columns:
                assign_df[col] = _to_dt(assign_df[col])

        return emp_df, assign_df, task_df

    # ---------- public reports ----------
    def status_summary(self) -> Dict[str, int]:
        """
        Returns counts per status across assignments.
        Keys: Pending, In Progress, Completed, Deferred.
        """
        _, assign_df, _ = self._load_dataframes()
        if assign_df.empty:
            return {"Pending": 0, "In Progress": 0, "Completed": 0, "Deferred": 0}

        # Default missing status to Pending
        assign_df["status"] = assign_df["status"].fillna("Pending")

        counts = assign_df["status"].value_counts().to_dict()
        # Ensure all keys exist
        for k in ("Pending", "In Progress", "Completed", "Deferred"):
            counts.setdefault(k, 0)
        return counts

    def workload_distribution(self) -> pd.DataFrame:
        """
        Returns DataFrame with columns:
          - employee (name)
          - tasks_assigned (count)
        Includes only active employees if 'is_active' is present.
        """
        emp_df, assign_df, _ = self._load_dataframes()
        if emp_df.empty:
            return pd.DataFrame(columns=["employee", "tasks_assigned"])

        # Only active employees if available
        if "is_active" in emp_df.columns:
            emp_df = emp_df[(emp_df["is_active"] == True) | (emp_df["is_active"].isna())]

        if assign_df.empty:
            out = emp_df[["id", "full_name"]].copy()
            out.rename(columns={"full_name": "employee"}, inplace=True)
            out["tasks_assigned"] = 0
            return out[["employee", "tasks_assigned"]]

        # Count assignments by employee_id
        counts = assign_df.groupby("employee_id").size().reset_index(name="tasks_assigned")
        merged = counts.merge(emp_df[["id", "full_name"]], left_on="employee_id", right_on="id", how="right")
        merged["tasks_assigned"] = merged["tasks_assigned"].fillna(0).astype(int)
        merged.rename(columns={"full_name": "employee"}, inplace=True)
        return merged[["employee", "tasks_assigned"]]

    def delay_report(self) -> pd.DataFrame:
        """
        Returns a table of overdue assignments with columns:
          - employee
          - task_id
          - due_date
          - status
          - days_overdue
        """
        emp_df, assign_df, task_df = self._load_dataframes()
        if assign_df.empty or task_df.empty:
            return pd.DataFrame(columns=["employee", "task_id", "due_date", "status", "days_overdue"])

        # Merge assignments with tasks to get due_date
        merged = assign_df.merge(task_df[["id", "due_date"]], left_on="task_id", right_on="id", how="left", suffixes=("", "_task"))
        # Merge with employees for names
        merged = merged.merge(emp_df[["id", "full_name"]], left_on="employee_id", right_on="id", how="left", suffixes=("", "_emp"))
        merged.rename(columns={"full_name": "employee", "id": "assignment_id"}, inplace=True)

        # Overdue: due_date < today and not Completed/Deferred
        today = pd.to_datetime(date.today())
        merged["status"] = merged["status"].fillna("Pending")
        overdue = merged[
            (merged["due_date"].notna())
            & (merged["due_date"] < today)
            & (~merged["status"].isin(["Completed", "Deferred"]))
        ].copy()

        if overdue.empty:
            return pd.DataFrame(columns=["employee", "task_id", "due_date", "status", "days_overdue"])

        overdue["days_overdue"] = (today - overdue["due_date"]).dt.days
        return overdue[["employee", "task_id", "due_date", "status", "days_overdue"]].sort_values(
            by=["days_overdue"], ascending=False
        ).reset_index(drop=True)

    def compliance_calendar(self) -> pd.DataFrame:
        """
        Returns a simple calendar-style table with columns:
          - due_date (date)
          - task_id
          - title
          - client_name
        """
        _, _, task_df = self._load_dataframes()
        if task_df.empty:
            return pd.DataFrame(columns=["due_date", "task_id", "title", "client_name"])

        out = task_df[["due_date", "id", "title", "client_name"]].copy()
        out.rename(columns={"id": "task_id"}, inplace=True)
        out = out.sort_values(by=["due_date", "client_name", "title"], na_position="last").reset_index(drop=True)
        return out
